/*******************************************************************************

Name: Table 5 - main results (also includes results for Tables D1-D7)

Authors: Jonathan Baker, Lori Bennear, Sheila Olmstead

Input files:
	Primary panel: natlCWS_yr_Panel_mod-small_v2-Match.dta
	Alternative panel: natlCWS_yr_Panel_mod.dta
		
*******************************************************************************/


* ========================== START START START ==========================
clear
clear matrix
set more off

* ______________________________________________________________________________
* set paths and create log file
global root "C:/Users/jbaker/Documents/Research/SDWIS/Analysis/STATA"
global input "$root/Input"
global intermediate "$root/Intermediate"
global log "$root/LogFiles"
global output "$root/Output"
global outpanel "$root/OutputPanel"
global figures "$root/Figures"

log using "$log/Table5_mainResults.log", replace name(analysis)

* ============================================================================== 

* Create tradeoff panel

* ==============================================================================

** STEP 0: merge matched panel onto original panel with all violation types
use "$outpanel/natlCWS_yr_Panel_mod.dta", clear
	keep pwsidNUM year HLTH_*
	drop HLTH_999 // matched panel already has HLTH_999
	sort pwsidNUM year
	isid pwsidNUM year
	save "$intermediate/temp_cwspanel.dta", replace
	
use "$outpanel/natlCWS_yr_Panel_mod-small_v2-Match.dta", clear
	merge 1:1 pwsidNUM year using "$intermediate/temp_cwspanel.dta"
	sort pwsidNUM year
	isid pwsidNUM year
	save "$intermediate/temp_tradeoffPanel.dta", replace

erase "$intermediate/temp_cwspanel.dta"

** STEP 1: read in panel and keep necessary variables
local threshold "501 10k 100k"
foreach req of local threshold {
	use "$intermediate/temp_tradeoffPanel.dta", clear

	keep if id2001 == 1 & mflag`req' == 1
	 
	tabstat HLTH_*, stats(sum) c(s) // 

	// define Microbial health violations
	gen microb = HLTH_110 + HLTH_121 + HLTH_122 + HLTH_123 + HLTH_130 + HLTH_140
	gen dbp = HLTH_210 + HLTH_220 + HLTH_230

	// keep variables necessary for regression model
	rename HLTH_999 allhealth
	keep pwsid pwsidNUM year state allhealth microb dbp PostxT_* Postxsize Postxsize2 freq_`req'

	tabstat allhealth microb dbp, stats(n mean sum) c(s)


** STEP 2: Create panel following Blackwell (2005)
	    *  from original panel, make 2 panels, one for health, the other for MR

		* ------------------------------------------------------------------------------
		egen maxID = max(pwsidNUM) // will be used for creating unique pwsid code
		gen year2 = year
		gen state2 = state
		save "$intermediate/dummyPanel.dta", replace

		* ------------------------------------------------------------------------------
		** Panel 1: microbial contaminants
		drop dbp // and eventually will need to drop other variables
		gen eq_microb  = 1
		gen eqID = 1

		drop maxID // unneeded for the first panel

		rename microb viols
		rename (PostxT_501 PostxT_10k PostxT_100k) (PostxT_501_microb PostxT_10k_microb PostxT_100k_microb)
		rename (Postxsize Postxsize2) (Postxsize_microb Postxsize2_microb)

		order pwsidNUM year eq_microb
		sort pwsidNUM year
		save "$intermediate/TEMP-BWpanel_microb.dta", replace

		* ------------------------------------------------------------------------------
		** Panel 2: dbp contaminants
		use "$intermediate/dummyPanel.dta", clear
		drop microb // and eventually will need to drop other variables
		gen eq_dbp  = 1
		gen eqID = 2

		replace pwsidNUM = pwsidNUM + maxID // step 2 in creating unique pwsid ID code
		drop maxID // no longer needed
		replace year2 = year2 + 2000
		replace state2 = state2 + 100

		rename dbp viols
		rename (PostxT_501 PostxT_10k PostxT_100k) (PostxT_501_dbp PostxT_10k_dbp PostxT_100k_dbp)
		rename (Postxsize Postxsize2) (Postxsize_dbp Postxsize2_dbp)

		order pwsidNUM year eq_dbp
		sort pwsidNUM year
		save "$intermediate/TEMP-BWpanel_dbp.dta", replace

		* ------------------------------------------------------------------------------
		** Append Panel B to the end of Panel A
		use "$intermediate/TEMP-BWpanel_microb.dta", clear
		append using "$intermediate/TEMP-BWpanel_dbp.dta"
		order eqID viols pwsidNUM year

		foreach var of varlist eq_microb PostxT_501_microb PostxT_10k_microb PostxT_100k_microb Postxsize_microb Postxsize2_microb {
			quietly replace `var'=0 if `var'==. & eqID==2
			}
		foreach var of varlist eq_dbp PostxT_501_dbp PostxT_10k_dbp PostxT_100k_dbp Postxsize_dbp Postxsize2_dbp {
			quietly replace `var'=0 if `var'==. & eqID==1
			}

		sort pwsidNUM year
		save "$outpanel/TradeoffPanel_match_`req'.dta", replace
		}
		
erase "$intermediate/TEMP-BWpanel_microb.dta"
erase "$intermediate/TEMP-BWpanel_dbp.dta"
erase "$intermediate/dummyPanel.dta"
erase "$intermediate/temp_tradeoffPanel.dta"


* ============================================================================== 

* Table 5: Main specification (models 1, 2, 4, and 6)

* ==============================================================================
use "$outpanel/natlCWS_yr_Panel_mod-small_v2-Match.dta", clear
xtset pwsidNUM year

local DV "HLTH_999"
local id "id2001"

// publishing threshold
local req "501"
display "Table 5 (1) ------------------------------------"
reghdfe `DV' PostxT_501 Postxsize ///
	if `id'==1 & mflag`req'==1 [fweight=freq_`req'], ///
	a(i.pwsidNUM i.state#i.year) vce(cl pwsidNUM)
display "Model: `e(cmdline)'"
estimates save "$output/sysFEstyrFEfsize-`id'-`req'.ster", replace

display "Table 5 (2) ------------------------------------"	
reghdfe `DV' PostxT_`req' Postxsize Postxsize2 ///
	if `id'==1 & mflag`req'==1 [fweight=freq_`req'], ///
	a(i.pwsidNUM i.state#i.year) vce(cl pwsidNUM)
display "Model: `e(cmdline)'"
estimates save "$output/sysFEstyrFEfsize2-`id'-`req'.ster", replace

// mailing threshold
local req "10k"
display "Table 5 (4) ------------------------------------"
reghdfe `DV' PostxT_`req' Postxsize Postxsize2 ///
	if `id'==1 & mflag`req'==1 [fweight=freq_`req'], ///
	a(i.pwsidNUM i.state#i.year) vce(cl pwsidNUM)
display "Model: `e(cmdline)'"
estimates save "$output/sysFEstyrFEfsize2-`id'-`req'.ster", replace

// web threshold
local req "100k"
display "Table 5 (6) ------------------------------------"
reghdfe `DV' PostxT_`req' Postxsize Postxsize2 ///
	if `id'==1 & mflag`req'==1 [fweight=freq_`req'], ///
	a(i.pwsidNUM i.state#i.year) vce(cl pwsidNUM)
display "Model: `e(cmdline)')"
estimates save "$output/sysFEstyrFEfsize2-`id'-`req'.ster", replace

* ============================================================================== 

* Table 5: Main specification (microbial vs. DBP violations: models 3, 5, and 7)
* (also includes results for Table D1)

* ==============================================================================
local threshold "501 10k 100k"
local tick 3
foreach req of local threshold {
	use "$outpanel/TradeoffPanel_match_`req'.dta", clear
	
	/*
	reghdfe viols PostxT_`req'_microb PostxT_`req'_dbp [fweight=freq_`req'], ///
		a(i.state2#i.year2 i.pwsidNUM) vce(cl pwsidNUM)
	display "Model: `e(cmdline)'"
	estimates save "$output/tradeoff-microbdbp-sysFEstyrFE-id2001-`req'.ster", replace
	test (PostxT_`req'_microb - PostxT_`req'_dbp = 0)
	*/

	display "Table 5 (`tick') ------------------------------------"
	reghdfe viols PostxT_`req'_microb Postxsize_microb Postxsize2_microb ///
		PostxT_`req'_dbp Postxsize_dbp Postxsize2_dbp [fweight=freq_`req'], ///
		a(i.state2#i.year2 i.pwsidNUM) vce(cl pwsidNUM)
	display "Model: `e(cmdline)'"
	estimates save "$output/tradeoff-microbdbp-sysFEstyrFEfs-id2001-`req'.ster", replace
	test (PostxT_`req'_microb - PostxT_`req'_dbp = 0)
	local tick = `tick' + 2
	}


* ============================================================================== 

* Table 5: Main specification (dose response: models 8 and 9)

* ==============================================================================
// the following block of code borrows from tradeoffAnalysis_v3.do to merge back
//   into the matched dataset the specific violation types. This allows for a
//   definition of microbial violations
	use "$outpanel/natlCWS_yr_Panel_mod.dta", clear
		keep pwsidNUM year HLTH_*
		drop HLTH_999 // matched panel already has HLTH_999
		sort pwsidNUM year
		isid pwsidNUM year
		save "$intermediate/temp_cwspanel.dta", replace
		
	use "$outpanel/natlCWS_yr_Panel_mod-small_v2-Match.dta", clear
		merge 1:1 pwsidNUM year using "$intermediate/temp_cwspanel.dta"
		sort pwsidNUM year
		isid pwsidNUM year
		save "$intermediate/temp_tradeoffPanel.dta", replace

	erase "$intermediate/temp_cwspanel.dta"

	gen microb = HLTH_110 + HLTH_121 + HLTH_122 + HLTH_123 + HLTH_130 + HLTH_140

local depvar "all microb"
local tick 8
foreach y of local depvar {

	// the following if statement sets the dependent variable
	if "`y'" == "all" {
		local DV = "HLTH_999"
		}
	else if "`y'" == "microb" {
		local DV = "`y'"
		}

display "================= Dependent variable is set to `DV' ================="

local sampleid "id2001"
foreach id of local sampleid {
    /*
	display "501 Threshold: all fixed effects plus f(size)^2, `id' ** 501 & 10k **"
	reghdfe `DV' PostxT_501 PostxT_10k Postxsize Postxsize2 ///
		if `id'==1 & mflag501==1 [fweight=freq_501], ///
		a(i.pwsidNUM i.state#i.year) vce(cl pwsidNUM)
	display "Model: `e(cmdline)'"
	estimates save "$output/sysFEstyrFEfsize2-`DV'-`id'-pubMail.ster", replace
	*/
	
	display "Table 5 (`tick') ------------------------------------"
	reghdfe `DV' PostxT_501 PostxT_10k PostxT_100k Postxsize Postxsize2 ///
		if `id'==1 & mflag501==1 [fweight=freq_501], ///
		a(i.pwsidNUM i.state#i.year) vce(cl pwsidNUM)
	display "Model: `e(cmdline)'"
	estimates save "$output/sysFEstyrFEfsize2-`DV'-`id'-pubMailWeb.ster", replace
	local tick = `tick' + 1
}
}

* ============================================================================== 

* Tables D2-D5: proxy policy in 1994, 1995, 1996, & 1997

* ==============================================================================
local DV "HLTH_999"
local proxies 1994 1995 1996 1997
foreach proxy_year of local proxies {

local tick = `proxy_year' - 1992

use "$outpanel/natlCWS_yr_Panel_mod-small_v2-Match.dta", clear
xtset pwsidNUM year
display "Proxy Year is `proxy_year' and dependent variable is set to `DV'"

	gen proxy = 0 // proxy policy
	replace proxy = 1 if year >= `proxy_year'
	label variable proxy "after proxy ruling (`proxy_year')"

	local threshold "501 10k 100k"
	foreach req of local threshold {
		gen proxyxT_`req' = proxy * T_`req'
		}
	gen proxyxsize = proxy*size
	gen proxyxsize2 = proxyxsize^2

	local sampleid "id2001"
	local modelno = 1
	foreach req of local threshold { 
	   	foreach id of local sampleid {
		    /*
			reghdfe `DV' proxyxT_`req' ///
				if `id'==1 & year < 1998 & mflag`req'==1 [fweight=freq_`req'], ///
				a(i.pwsidNUM i.state#i.year) vce(cl pwsidNUM)
			display "Model: `e(cmdline)'"
			estimates save "$output/Proxy`proxy_year'-sysFEstyrFE-`id'-`req'.ster", replace
			*/
			
			display "Table D`tick' (`modelno') ------------------------------------"
			reghdfe `DV' proxyxT_`req' proxyxsize proxyxsize2 ///
				if `id'==1 & year < 1998 & mflag`req'==1 [fweight=freq_`req'], ///
				a(i.pwsidNUM i.state#i.year) vce(cl pwsidNUM)
			display "Model: `e(cmdline)'"
			estimates save "$output/Proxy`proxy_year'-sysFEstyrFEfsize2-`id'-`req'.ster", replace
			local modelno = `modelno' + 1
		}
	}
}

* ============================================================================== 

* Table D6: donut regressions

* ==============================================================================
use "$outpanel/natlCWS_yr_Panel_mod-small_v2-Match.dta", clear
local DV "HLTH_999"
local donut "10 20 30"
local sampleid "id2001"
foreach pct of local donut { 
	foreach id of local sampleid {
	// publishing threshold ----------------------------------------------------
	local min = 500 - 500*(`pct'/100)
	local max = 500 + 500*(`pct'/100)
	
	/*
	reghdfe `DV' PostxT_501 ///
		if `id'==1 & (pop < `min' | pop > `max') & mflag501==1 [fweight=freq_501], ///
		a(i.pwsidNUM i.state#i.year) vce(cl pwsidNUM)
	display "Model: `e(cmdline)'"
	estimates save "$output/sysFEstyrFE-`id'-501-`pct'pct.ster", replace
	*/
	local tick = `pct'/10
	display "Table D6: (`tick') ------------------------------------"
	reghdfe `DV' PostxT_501 Postxsize Postxsize2 ///
		if `id'==1 & (pop < `min' | pop > `max') & mflag501==1 [fweight=freq_501], ///
		a(i.pwsidNUM i.state#i.year) vce(cl pwsidNUM)
	display "Model: `e(cmdline)'"
	estimates save "$output/sysFEstyrFEfsize2-`id'-501-`pct'pct.ster", replace
	
	/*
	// mailing threshold -------------------------------------------------------
	local min = 10000 - 10000*(`pct'/100)
	local max = 10000 + 10000*(`pct'/100)
		
	reghdfe `DV' PostxT_10k ///
		if `id'==1 & (pop < `min' | pop > `max') & mflag10k==1 [fweight=freq_10k], ///
		a(i.pwsidNUM i.state#i.year) vce(cl pwsidNUM)
	display "Model: `e(cmdline)'"
	estimates save "$output/sysFEstyrFE-`id'-10k-`pct'pct.ster", replace

	reghdfe `DV' PostxT_10k Postxsize Postxsize2 ///
		if `id'==1 & (pop < `min' | pop > `max') & mflag10k==1 [fweight=freq_10k], ///
		a(i.pwsidNUM i.state#i.year) vce(cl pwsidNUM)
	display "Model: `e(cmdline)'"
	estimates save "$output/sysFEstyrFEfsize2-`id'-10k-`pct'pct.ster", replace

	// online threshold --------------------------------------------------------
	local min = 100000 - 100000*(`pct'/100)
	local max = 100000 + 100000*(`pct'/100)
		
	reghdfe `DV' PostxT_100k ///
		if `id'==1 & (pop < `min' | pop > `max') & mflag100k==1 [fweight=freq_100k], ///
		a(i.pwsidNUM i.state#i.year) vce(cl pwsidNUM)
	display "Model: `e(cmdline)'"
	estimates save "$output/sysFEstyrFE-`id'-100k-`pct'pct.ster", replace

	reghdfe `DV' PostxT_100k Postxsize Postxsize2 ///
		if `id'==1 & (pop < `min' | pop > `max') & mflag100k==1 [fweight=freq_100k], ///
		a(i.pwsidNUM i.state#i.year) vce(cl pwsidNUM)
	display "Model: `e(cmdline)'"
	estimates save "$output/sysFEstyrFEfsize2-`id'-100k-`pct'pct.ster", replace
	*/
	}
}

* ============================================================================== 

* Table D7: isolate treatment effect

* ==============================================================================
use "$outpanel/natlCWS_yr_Panel_mod-small_v2-Match.dta", clear
local id "id2001"
local DV "HLTH_999"

// PUBLISHING THRESHOLD
display "Table D7: (1) ------------------------------------"
reghdfe `DV' PostxT_501 ///
	if `id'==1 & inrange(pop,0,9999) & mflag501==1 [fweight=freq_501], ///
	a(i.pwsidNUM i.state#i.year) vce(cl pwsidNUM)
display "Model: `e(cmdline)'"
estimates save "$output/sysFEstyrFE-`id'-501-0to10k.ster", replace

display "Table D7: (2) ------------------------------------"
reghdfe `DV' PostxT_501 Postxsize Postxsize2 ///
if `id'==1 & inrange(pop,0,9999) & mflag501==1 [fweight=freq_501], ///
a(i.pwsidNUM i.state#i.year) vce(cl pwsidNUM)
	display "Model: `e(cmdline)'"
	estimates save "$output/sysFEstyrFEfsize2-`id'-501-0to10k.ster", replace

// MAILING THRESHOLD 
display "Table D7: (3) ------------------------------------"	
reghdfe `DV' PostxT_10k ///
	if `id'==1 & inrange(pop,501,99999) & mflag10k==1 [fweight=freq_10k], ///
	a(i.pwsidNUM i.state#i.year) vce(cl pwsidNUM)
display "Model: `e(cmdline)'"
estimates save "$output/sysFEstyrFE-`id'-10k-500to100k.ster", replace

display "Table D7: (4) ------------------------------------"
reghdfe `DV' PostxT_10k Postxsize Postxsize2 ///
if `id'==1 & inrange(pop,501,99999) & mflag10k==1 [fweight=freq_10k], ///
a(i.pwsidNUM i.state#i.year) vce(cl pwsidNUM)
	display "Model: `e(cmdline)'"
	estimates save "$output/sysFEstyrFEfsize2-`id'-10k-500to100k.ster", replace

/*
* ============================================================================== 

* Main specification (all models)

* ==============================================================================
use "$outpanel/natlCWS_yr_Panel_mod-small_v2-Match.dta", clear
xtset pwsidNUM year

local DV "HLTH_999"
display "Dependent variable is set to `DV'"

local threshold "501 10k 100k"
local sampleid "id2001" 
foreach req of local threshold { 
	foreach id of local sampleid {

		reg `DV' PostxT_`req' Post T_`req' ///
		if `id'==1 & mflag`req'==1 [fweight=freq_`req'], ///
		robust cl(pwsidNUM)
			display "Model: `e(cmdline)'"
			estimates save "$output/pooled-`id'-`req'.ster", replace

		reghdfe `DV' PostxT_`req' Post ///
		if `id'==1 & mflag`req'==1 [fweight=freq_`req'], ///
		a(i.pwsidNUM) vce(cl pwsidNUM)
			display "Model: `e(cmdline)'"
			estimates save "$output/sysFE-`id'-`req'.ster", replace

		reghdfe `DV' PostxT_`req' ///
		if `id'==1 & mflag`req'==1 [fweight=freq_`req'], ///
		a(i.pwsidNUM i.year) vce(cl pwsidNUM)
			display "Model: `e(cmdline)'"
			estimates save "$output/sysFEyrFE-`id'-`req'.ster", replace

		reghdfe `DV' PostxT_`req' ///
		if `id'==1 & mflag`req'==1 [fweight=freq_`req'], ///
		a(i.pwsidNUM i.state#i.year) vce(cl pwsidNUM)
			display "Model: `e(cmdline)'"
			estimates save "$output/sysFEstyrFE-`id'-`req'.ster", replace

		reghdfe `DV' PostxT_`req' Postxsize ///
		if `id'==1 & mflag`req'==1 [fweight=freq_`req'], ///
		a(i.pwsidNUM i.state#i.year) vce(cl pwsidNUM)
			display "Model: `e(cmdline)'"
			estimates save "$output/sysFEstyrFEfsize-`id'-`req'.ster", replace
	
		reghdfe `DV' PostxT_`req' Postxsize Postxsize2 ///
		if `id'==1 & mflag`req'==1 [fweight=freq_`req'], ///
		a(i.pwsidNUM i.state#i.year) vce(cl pwsidNUM)
			display "Model: `e(cmdline)'"
			estimates save "$output/sysFEstyrFEfsize2-`id'-`req'.ster", replace
	}
}
*/
* ========================== DONE DONE DONE ==========================
log close analysis
